import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
Read files and saved as dataframe objects
#Read all files
path1 ='C:/Users/gochicken/SisFall_dataset/SisFall_dataset/SAnumber'
path2 = 'C:/Users/gochicken/SisFall_dataset/SisFall_dataset/SEnumber'#Change the route to your
path_list=[]
for n in range(23):
if n<9:
new_path = path1.replace('number','0'+str(n+1))
else:
new_path = path1.replace('number',str(n+1))
path_list.append(new_path)
#save to objects and divide object name into four group
slipfall_objects=[]
tripfall_objects=[]
walking_slowly_objects=[]
walking_quickly_objects=[]
g = globals()
for p in path_list:
patient_number=p.split('/')[-1]
g[patient_number+'slipfall']=pd.read_csv((p+'/F01_'+patient_number+'_R01.txt'),header=None)
g[patient_number+'slipfall'].iloc[:,-1]=g[patient_number+'slipfall'].iloc[:,-1].str.replace(';','').astype('int')
slipfall_objects.append((patient_number+'slipfall'))
for p in path_list:
patient_number=p.split('/')[-1]
g[patient_number+'tripfall']=pd.read_csv((p+'/F04_'+patient_number+'_R01.txt'),header=None)
g[patient_number+'tripfall'].iloc[:,-1]=g[patient_number+'tripfall'].iloc[:,-1].str.replace(';','').astype('int')
tripfall_objects.append((patient_number+'tripfall'))
for n in range(15):
if n<9:
new_path = path2.replace('number','0'+str(n+1))
else:
new_path = path2.replace('number',str(n+1))
path_list.append(new_path)
for p in path_list:
patient_number=p.split('/')[-1]
g[patient_number+'walking_slowly']=pd.read_csv((p+'/D01_'+patient_number+'_R01.txt'),header=None)
g[patient_number+'walking_slowly'].iloc[:,-1]=g[patient_number+'walking_slowly'].iloc[:,-1].str.replace(';','').astype('int')
walking_slowly_objects.append((patient_number+'walking_slowly'))
for p in path_list:
patient_number=p.split('/')[-1]
g[patient_number+'walking_quickly']=pd.read_csv((p+'/D02_'+patient_number+'_R01.txt'),header=None)
g[patient_number+'walking_quickly'].iloc[:,-1]=g[patient_number+'walking_quickly'].iloc[:,-1].str.replace(';','').astype('int')
walking_quickly_objects.append((patient_number+'walking_quickly'))
Normalize raw data to internationl units (g,-/s) and transfer all dataframe to np ndarray objects
for object_name in slipfall_objects+tripfall_objects+walking_slowly_objects+walking_quickly_objects:
g[object_name].iloc[:,:3]=g[object_name].iloc[:,:3].apply(lambda x : (2*16)/(2**13)*x)
g[object_name].iloc[:,3:6]=g[object_name].iloc[:,3:6].apply(lambda x : (2*2000)/(2**16)*x)
g[object_name].iloc[:,6:]=g[object_name].iloc[:,6:].apply(lambda x : (2*8)/(2**14)*x)
g[object_name]=g[object_name].to_numpy()
Parse the first 15s of Walking slowly and quickly tests
walking_slowly_objects_1st15s=[]
walking_quickly_objects_1st15s=[]
for object_name in walking_slowly_objects:
g[object_name+'_1st15s'] = g[object_name][:3000,:]
walking_slowly_objects_1st15s.append(object_name+'_1st15s')
for object_name in walking_quickly_objects:
g[object_name+'_1st15s'] = g[object_name][:3000,:]
walking_quickly_objects_1st15s.append(object_name+'_1st15s')
slow walking vs fast walking (ADXL345 data)
fig=plt.figure(figsize=(16,150))
for object_name,n in zip(walking_slowly_objects_1st15s,range(1,39)):
ax1=fig.add_subplot(38,2,2*n-1)
ax1.plot((np.arange(3000))/200,g[object_name][:,0],c='blue',label='x')
ax1.plot((np.arange(3000))/200,g[object_name][:,1],c='green',label='y')
ax1.plot((np.arange(3000))/200,g[object_name][:,2],c='red',label='z')
ax1.set_ylim(-4,4)
ax1.set_title(object_name)
ax1.legend()
for object_name,n in zip(walking_quickly_objects_1st15s,range(1,39)):
ax1=fig.add_subplot(38,2,2*n)
ax1.plot((np.arange(3000))/200,g[object_name][:,0],c='blue',label='x')
ax1.plot((np.arange(3000))/200,g[object_name][:,1],c='green',label='y')
ax1.plot((np.arange(3000))/200,g[object_name][:,2],c='red',label='z')
ax1.set_ylim(-4,4)
ax1.set_title(object_name)
ax1.legend()
plt.show()
It seems not every participant starts walking immediately after accelerometer recording,such as SA03,SE04,SE05,who's waiting for seconds after order.Therefore,It's necessary to parse time series data after 6th seconds
Parse time series data after the 6th second
walking_slowly_objects_15s=[]
walking_quickly_objects_15s=[]
for object_name in walking_slowly_objects:
g[object_name+'_15s'] = g[object_name][1200:4200,:]
walking_slowly_objects_15s.append(object_name+'_15s')
for object_name in walking_quickly_objects:
g[object_name+'_15s'] = g[object_name][1200:4200,:]
walking_quickly_objects_15s.append(object_name+'_15s')
fig=plt.figure(figsize=(16,150))
for object_name,n in zip(walking_slowly_objects_15s,range(1,39)):
ax=fig.add_subplot(38,2,2*n-1)
ax.plot((np.arange(3000))/200,g[object_name][:,0],c='blue',label='x')
ax.plot((np.arange(3000))/200,g[object_name][:,1],c='green',label='y')
ax.plot((np.arange(3000))/200,g[object_name][:,2],c='red',label='z')
ax.set_ylim(-4,4)
ax.set_title(object_name)
ax.legend()
for object_name,n in zip(walking_quickly_objects_15s,range(1,39)):
ax=fig.add_subplot(38,2,2*n)
ax.plot((np.arange(3000))/200,g[object_name][:,0],c='blue',label='x')
ax.plot((np.arange(3000))/200,g[object_name][:,1],c='green',label='y')
ax.plot((np.arange(3000))/200,g[object_name][:,2],c='red',label='z')
ax.set_ylim(-4,4)
ax.set_title(object_name)
ax.legend()
plt.show()
By comparing the pairwise figures, I think the group of quick ambulation demonstrated more intensive crest,especially in terms of the y-axis vectors(green line),which revealed larger cadence. So I'm going to se if we can parse peak times per seconds. I referenced the process of extracting gait dynamics with sensormotion package on Github https://github.com/sho-87/sensormotion
Noise filtering
import sensormotion as sm
#create a time object in milliseconds
sampling_rate = 200
seconds = 15
time = np.arange(0, seconds*sampling_rate) *5
#build a filter with 4th order low-pass filter with cut-off frequency of 5 Hz,Just like the previous research reported
b, a = sm.signal.build_filter(5, sampling_rate, 'low', filter_order=4)
#filtered all objects in walking_slowly and walking_quickly lists
filtered_objects_ADXL345=[]
fig=plt.figure(figsize=(16,150))
for object_name,n in zip(walking_slowly_objects_15s,range(1,39)):
x_f = sm.signal.filter_signal(b, a, g[object_name][:,0])
y_f = sm.signal.filter_signal(b, a, g[object_name][:,1])
z_f = sm.signal.filter_signal(b, a, g[object_name][:,2])
g[object_name+'_f'] = np.stack((x_f,y_f,z_f),axis=-1)
filtered_objects_ADXL345.append(object_name+'_f')
ax1 = fig.add_subplot(38,2,2*n-1)
ax1.plot((np.arange(3000))/200,x_f,c='blue',label='x')
ax1.plot((np.arange(3000))/200,y_f,c='green',label='y')
ax1.plot((np.arange(3000))/200,z_f,c='red',label='z')
ax1.set_ylim(-4,4)
ax1.set_title(object_name)
ax1.legend()
for object_name,n in zip(walking_quickly_objects_15s,range(1,39)):
x_f = sm.signal.filter_signal(b, a, g[object_name][:,0])
y_f = sm.signal.filter_signal(b, a, g[object_name][:,1])
z_f = sm.signal.filter_signal(b, a, g[object_name][:,2])
g[object_name+'_f'] = np.stack((x_f,y_f,z_f),axis=-1)
filtered_objects_ADXL345.append(object_name+'_f')
ax1=fig.add_subplot(38,2,2*n)
ax1.plot((np.arange(3000))/200,x_f,c='blue',label='x')
ax1.plot((np.arange(3000))/200,y_f,c='green',label='y')
ax1.plot((np.arange(3000))/200,z_f,c='red',label='z')
ax1.set_ylim(-4,4)
ax1.set_title(object_name)
ax1.legend()
plt.show()
Counting peaks
cadence_grouped={}
for object_name in filtered_objects_ADXL345:
peak_times,_ = sm.peak.find_peaks(time, g[object_name][:,1], peak_type='valley',min_dist=70,min_val=0.45,plot=True)
cadence = sm.gait.cadence(time, peak_times)
cadence_grouped[object_name] = cadence
#I have no idea about how to set a resonable thereshold of minimal interval and normalized amplitude.After several times attempts,
#I adjusted min_dist to 70 and min_val to 0.45 and found it was appropriate to include nearly all authentic peaks.
Visualize the Cadence between walking speed groups
cadence_grouped=pd.DataFrame.from_dict(cadence_grouped, orient='index',columns=['cadence'])
cadence_grouped['age']=['young' if 'SA'in x else 'elderly' for x in cadence_grouped.index]
cadence_grouped['mode']=['slowly' if 'slowly'in x else 'quickly' for x in cadence_grouped.index]
cadence_grouped #create a dataframe including cadence for all participants
#draw a boxplot
import seaborn as sns
ax=sns.boxplot(data=cadence_grouped,x='mode',y='cadence',hue='age',palette="Set2")
Expect for outliers,I found for each inner-group(young vs elderly) or inter-group(slowly vs quickly) comparison, the lower quartile is higher than upper quartile of another object, which means when considering cadence as the filtering threshold we should set up different values between young and old participants. According to the boxplot results, I think the cadence of 100.033 for young participants group and 112.037 for elderly participants group is appropriate for discriminating slow or quick ambulation.
#see if we can define a filtering value
print(cadence_grouped[(cadence_grouped['age']=='young') & (cadence_grouped['mode']=='slowly')].describe().iloc[-2])#'young slowly' upper quartile
print(cadence_grouped[(cadence_grouped['age']=='young') & (cadence_grouped['mode']=='quickly')].describe().iloc[-4])#'young quickly' lower quartile
print(cadence_grouped[(cadence_grouped['age']=='elderly') & (cadence_grouped['mode']=='slowly')].describe().iloc[-2])#'eldly slowly' upper quartile
print(cadence_grouped[(cadence_grouped['age']=='elderly') & (cadence_grouped['mode']=='quickly')].describe().iloc[-4])#'young slowly' lower quartile
#I'll take the average as filtering value
young_cadence_cutoff = (96.0320119+112.037346)/2
eldely_cadence_cutoff = (104.034678+116.03868)/2
On the other hand, I think we should try to distinguish walking speed through signal magnitude on frontal plane, because walking slowly could include longer standing phase, which could lead an overactivity of hip abductors and potential instability in the frontal plane. So I'm going to see if it could reflects on the data of frontal vectors.
for object_name in filtered_objects_ADXL345: